import scrapy

from orange.items import OrangeItem


class OeSpider(scrapy.Spider):
    name = "oe"
    allowed_domains = ["www.gengzhongbang.com"]
    start_urls = ["https://www.gengzhongbang.com/shuiguozhongzhi/ganju/ganjuzhongzhi/"]

    base_url = "https://www.gengzhongbang.com/shuiguozhongzhi/ganju/ganjuzhongzhi/list-"
    page = 1
    def parse(self, response):

        # pipekines 用来下载数据
        # alt = //dd[@class="xs2 cl"]/div//img/@alt
        # title=//dd[@class="xs2 cl"]/div/a/@title
        # nn = //dd[@class="xs2 cl"]
        # wz = //dd[@class="xs2 cl"]/div/a/@href
        div_list = response.xpath("//dd[@class='xs2 cl']/div")

        for div in div_list:
            title = div.xpath("./a/@title").extract_first()
            wz = div.xpath("./a/@href").extract_first()
            print(title, wz)
            cm = OrangeItem(title=title,href=wz)

            # 获取一个
            yield cm

        # 第二页 https://www.gengzhongbang.com/shuiguozhongzhi/caomei/caomeizhongzhi/list-2
        # 第三页 https://www.gengzhongbang.com/shuiguozhongzhi/caomei/caomeizhongzhi/list-3

        if self.page < 20:
            self.page += 1
            url = self.base_url + str(self.page)
            yield scrapy.Request(url=url, callback=self.parse)
